/* Copyright 2012 Tobias Marschall
 *
 * This file is part of CLEVER.
 *
 * CLEVER is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * CLEVER is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with CLEVER.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef READRECORD_H_
#define READRECORD_H_

#include <iostream>
#include <vector>

#include "../SplitAligner.h"
#include "../ShortDnaSequence.h"
#include "../NamedDnaSequence.h"
#include "../OverlappingRegions.h"
#include "Mapping.h"

/** Record on one read (end). */
class Read {
public:
	typedef struct anchor_t {
		int ref_id;
		int position;
		// does anchor refer to the reverse complement of the read
		bool reverse;
		// length of the query that is known to match at that position
		int length;
		// does the anchor stem from a mapping of the whole read?
		bool whole_read;
		// was anchor given as input? (or found during anchor search?)
		bool input_anchor;
		SplitAligner::anchor_alignment_t* alignment;
		anchor_t(int ref_id, int position, bool reverse, int length, bool whole_read, bool input_anchor) : ref_id(ref_id), position(position), reverse(reverse), length(length), whole_read(whole_read), input_anchor(input_anchor), alignment(0) {}
	} anchor_t;
	typedef struct anchor_comparator_t {
		bool operator()(const anchor_t& a1, const anchor_t& a2) {
			if (a1.ref_id != a2.ref_id) return a1.ref_id < a2.ref_id;
			if (a1.position != a2.position) return a1.position < a2.position;
			return a1.length > a2.length;
		}
	} anchor_comparator_t;
	typedef struct mapping_comparator_t {
		bool operator()(const Mapping* m1, const Mapping* m2) {
			if (m1->getRefId() != m2->getRefId()) return m1->getRefId() < m2->getRefId();
			if (m1->getStartPosition() != m2->getStartPosition()) return m1->getStartPosition() < m2->getStartPosition();
			if (m1->getEndPosition() != m2->getEndPosition()) return m1->getEndPosition() < m2->getEndPosition();
			return m1->getPhredScore() > m2->getPhredScore();
		}
	} mapping_comparator_t;
	typedef struct mapping_comparator_phredbased_t {
		bool operator()(const Mapping* m1, const Mapping* m2) {
			if (m1->getPhredScore() != m2->getPhredScore()) return m1->getPhredScore() < m2->getPhredScore();
			if (m1->getRefId() != m2->getRefId()) return m1->getRefId() < m2->getRefId();
			if (m1->getStartPosition() != m2->getStartPosition()) return m1->getStartPosition() < m2->getStartPosition();
			return m1->getEndPosition() < m2->getEndPosition();
		}
	} mapping_comparator_phredbased_t;
	/** Four sets of regions to be searched for left/right anchors on forward/backward strand. 
	 *  These regions are those positions where an anchor position is suspected, i.e. the actual
	 *  anchor might lie partly outside these regions.
	 */
	typedef struct anchor_search_regions_t {
		OverlappingRegions left;
		OverlappingRegions left_reverse;
		OverlappingRegions right;
		OverlappingRegions right_reverse;
		void add(const anchor_search_regions_t& regions) {
			left.add(regions.left);
			left_reverse.add(regions.left_reverse);
			right.add(regions.right);
			right_reverse.add(regions.right_reverse);
		}
		void subtract(const anchor_search_regions_t& regions) {
			left.subtract(regions.left);
			left_reverse.subtract(regions.left_reverse);
			right.subtract(regions.right);
			right_reverse.subtract(regions.right_reverse);
		}
	} anchor_search_regions_t;
private:
	ShortDnaSequence seq;
	std::vector<anchor_t> left_anchors;
	std::vector<anchor_t> right_anchors;
	std::vector<Mapping*> mappings;
	bool is_sorted;
	/** Tests whether this anchor can be extended into a valid mapping of the whole reads and,
	 *  if so, adds the alignment to mappings. */
	void checkForWholeMapping(const anchor_t& anchor, const SplitAligner& aligner);
	typedef enum { LEFT_ANCHOR, RIGHT_ANCHOR } anchor_type_t;
	typedef enum { FORWARD, REVCOMP } strandedness_t;
	void findNewAnchors(anchor_type_t anchor_type, strandedness_t strand, const OverlappingRegions& regions, const std::vector<NamedDnaSequence*>& references, int anchor_length, int allowed_errors);
	void removeDuplicateAnchors(std::vector<anchor_t>& anchor_list);
	void removeDuplicateMappings();
	void resetAnchorList(std::vector<anchor_t>& anchor_list);
public:
	Read();
	/** Copy constructor. */
	Read(const Read&);
	~Read();

	/** Store known alignments of the whole read. If just_sequence is true, only read sequence and qualities are
	 *  stored, but not the alignment(s).
	 */
	void addWholeAlignments(const std::vector<BamTools::BamAlignment>& alignments, const std::vector<NamedDnaSequence*>& references, bool just_sequence = false);
	/** Stores original sequence and qualities of whole (unsplit) read. */
	void addSequenceAndQualities(const std::string& dna, const std::string& qualities);
	/** Add a known left anchor, that is, a position where a prefix of a read matches. */
	void addLeftAnchor(int ref_id, int position, bool reverse, int length, bool whole_read);
	/** Add a known right anchor, that is, a position where a suffix of a read matches. */
	void addRightAnchor(int ref_id, int position, bool reverse, int length, bool whole_read);
	/** Determine the regions to be searched for anchors based on mate and already existing anchors.
	 *  If mate is 0, then regions are only based on already existing anchors. */
	std::auto_ptr<anchor_search_regions_t> determineAnchorSearchRegions(int search_distance, int min_anchor_length, const Read* mate);
	/** Search for anchors in the specified regions. */
	void findNewAnchors(const anchor_search_regions_t& regions, const std::vector<NamedDnaSequence*>& references, int anchor_length, int allowed_errors);
	/** Compute alignments from anchors; tries produce split alignments as well as unsplit alignments and
	 *  stores all found alignments.*/
	void computeAlignments(const std::vector<NamedDnaSequence*>& references, const SplitAligner& aligner, int max_span, bool interchromosomal, int max_interchromosomal_pairs);
	/** Sorts all mappings by PHRED score, ties are resolved by position. */
	void sortMappings();
	/** Removes all alignments with a PHRED score worse than the given value. 
	 *  Mappings must have been sorted by sortMappings().
	 */
	void filterByPhredScore(int max_phred);
	/** Discards all anchors found during anchor search and just retains those given as input. */
	void resetAnchorList();
	/** Discards all anchors. */
	void clearAnchorList();

	const std::vector<anchor_t>& leftAnchors() const { return left_anchors; }
	const std::vector<anchor_t>& rightAnchors() const { return right_anchors; }
	const std::vector<Mapping*>& getMappings() const { return mappings; }
	const ShortDnaSequence& getSequence() const { return seq; }
	friend std::ostream& operator<<(std::ostream&, const Read&);
};

std::ostream& operator<<(std::ostream&, const Read::anchor_search_regions_t&);

#endif /* READRECORD_H_ */
